This notebook contains code to characterize the distribution of
demographic (response) variables in the VISUALIZATION VIBES project
Study #2 (survey). It relies upon data wrangling performed in
0_VIBES_S2_wrangling.Rmd.
In VIBES Study 2, participants completed an attitutde eliciation survey, asking questions about their attitude toward (5) stimulus images (data visualizations). Each participant was randomly assigned to one of 6 stimulus blocks, each containing 1 image from each of (4) categories (ranging from most abstract to most figural). Each participant started by responding to questions for a single ‘common block’ stimulus (that is thus super-powered as it was seen by all participants). Two participant recruitment pools were used: Prolific, with a smaller set of participants recruited from Tumblr (to replicate results of interview Study 1 with participants sourced from Tumblr).
We start by importing data files previously wrangled in
0_VIBES_S2_wrangling.Rmd.
############## IMPORT REFERENCE FILES
ref_stimuli <- readRDS("data/input/REFERENCE/ref_stimuli.rds")
ref_surveys <- readRDS("data/input/REFERENCE/ref_surveys.rds")
############## IMPORT DATA FILES
df_data <- readRDS("data/output/df_data.rds")
df_participants <- readRDS("data/output/df_participants.rds")
df_questions <- readRDS("data/output/df_questions.rds")
df_sd_questions_wide <- readRDS("data/output/df_sd_questions_wide.rds")
df_sd_questions_long <- readRDS("data/output/df_sd_questions_long.rds")
df_tools <- readRDS("data/output/df_tools.rds")
df_actions <- readRDS("data/output/df_actions.rds")
df_graphs_full <- readRDS("data/output/df_graphs_full.rds")
df_graphs <- readRDS("data/output/df_graphs.rds")
############## SETUP GRAPH LABELS
ref_stim_id <- levels(ref_stimuli$ID)
ref_cat_questions <- c("MAKER_ID","MAKER_AGE","MAKER_GENDER")
ref_free_response <- c("MAKER_DETAIL", "MAKER_EXPLAIN", "TOOL_DETAIL", "CHART_EXPLAIN")
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- c("MAKER_DESIGN","MAKER_DATA","MAKER_POLITIC",
"MAKER_ARGUE","MAKER_SELF","MAKER_ALIGN","MAKER_TRUST",
"CHART_LIKE", "CHART_BEAUTY", "CHART_INTENT", "CHART_TRUST")
left <- c("professional","professional","left-leaning","confrontational",
"altruistic","does NOT share","untrustworthy",
"NOT at all","NOT at all", "inform", "untrustworthy")
right <- c("layperson","layperson","right-leaning","diplomatic",
"selfish", "DOES share", "trustworthy",
"very much", "very much", "persuade", "trusthworthy")
ref_labels <- as.data.frame(cbind(left,right))
rownames(ref_labels) <- ref_sd_questions
# ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
ref_blocks <- c(1,2,3,4,5,6)
rm(left,right)
############## SETUP COLOUR PALETTES
#https://www.r-bloggers.com/2022/06/custom-colour-palettes-for-ggplot2/
## list of color pallettes
my_colors = list(
politics = c("#184aff","#5238bf", "#4f4a52" ,"#84649c", "#ff0000")
)
## function for using palettes
my_palettes = function(name, n, all_palettes = my_colors, type = c("discrete")) {
palette = all_palettes[[name]]
if (missing(n)) {
n = length(palette)
}
type = match.arg(type)
out = switch(type,
continuous = grDevices::colorRampPalette(palette)(n),
discrete = palette[1:n]
)
structure(out, name = name, class = "palette")
}
Participants were sampled from two survey distribution sources: direct solicitation from co-author MM’s ethnographic research participants on Tumblr, and a general sample of US-BASED, English-speaking participants on PROLIFIC.
df <- df_participants
title = "Number of participants by distribution and stimulus block"
cols = c("Sampling Platform", "Block-1","Block-2","Block-3","Block-4","Block-5","Block-6", "Sum")
cont <- table(df$Distribution, df$Assigned.Block)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Sampling Platform | Block-1 | Block-2 | Block-3 | Block-4 | Block-5 | Block-6 | Sum |
|---|---|---|---|---|---|---|---|
| PROLIFIC | 40 | 40 | 40 | 40 | 40 | 40 | 240 |
| TUMBLR | 15 | 12 | 12 | 14 | 13 | 12 | 78 |
| Sum | 55 | 52 | 52 | 54 | 53 | 52 | 318 |
# qacBase::crosstab(df, Distribution, Assigned.Block)
There is substantial variance in choice of social media platform across the sample, though the relative distribution across randomly assigned stimulus blocks within each sample is roughly uniform, and thus comparable. This may be an interesting predictive variable in modelling outcomes.
At the start of the study, participants were asked to choose one of five social media platforms to serve as the situational context for the questions they would be asked. (ie. Imagine you are scrolling through your [platform] feed).
#SET DATAFRAME
df = df_data
# PLATFORM by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = PLATFORM , fill = Distribution)) +
geom_bar() +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "PLATFORM CHOICE by SAMPLE",
caption = "",
y = "PLATFORM",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# PLATFORM by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(PLATFORM) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "PLATFORM CHOICE by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "PLATFORM") +
theme_minimal()
# PLATFORM by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(PLATFORM) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "PLATFORM CHOICE by SAMPLE and BLOCK",
caption = "As expected, similar distribution within SAMPLE across BLOCKS",
x = "Stimulus Block",
fill = "PLATFORM") +
theme_minimal()
df <- df_data
## BOXPLOT — SURVEY RESPONSE TIME
ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=duration.min, color=Assigned.Block))+
geom_boxplot(position=position_dodge(0.9))+
geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
facet_grid(Distribution ~.) +
coord_flip() +
labs( y = "Survey Response Time (mins)", x="",
title = "TOTAL Response Time by SAMPLE and BLOCK",
caption = "distributions similar across both SAMPLES and stimulus BLOCKS [1-6]") +
theme_minimal() + theme(legend.position = "none")
## RIDGEPLOT — SURVEY RESPONSE TIME
ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
geom_density_ridges(scale=0.8) +
# geom_boxplot()+
stat_pointinterval()+
theme_ridges() +
scale_fill_discrete(direction=-1)+
facet_grid(Distribution ~.) +
theme_minimal() +
theme(legend.position = "none") +
labs( x = "Survey Response Time (mins)", y="",
title = "TOTAL Response Time by Sample",
caption = "distributions similar across SAMPLES and stimulus BLOCKS[1-6]")
## Picking joint bandwidth of 7.37
## Picking joint bandwidth of 11
p.desc.duration <- psych::describe(df_data %>% filter(Distribution=="PROLIFIC") %>% pull(duration.min))
t.desc.duration <- psych::describe(df_data %>% filter(Distribution=="TUMBLR") %>% pull(duration.min))
PROLIFIC SAMPLE (n = 240 ) participant response times ranged from 13.97 to 216.18 minutes, with a mean response time of 42.49 minutes, SD = 21.15.
TUMBLR SAMPLE (n = 78 ) participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 51.93 minutes, SD = 35.47.
Convenience sampling on both TUMBLR and PROLIFIC pools means we do not expect a uniform (i.e. equal) distribution of participants across age categories. We generally expect that the Tumblr may have more younger users; but expect that the distribution of participants in each age category is uniform within the sample source and across stimulus blocks.
#SET DATAFRAME
df = df_data
# AGE by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_age, fill = Distribution)) +
geom_bar() +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant AGE by SAMPLE",
caption = "",
y = "AGE",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# AGE by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_age) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant AGE by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "AGE") +
theme_minimal()
# AGE by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_age) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant AGE by SAMPLE and BLOCK",
caption = "As expected, similar distribution within SAMPLE across BLOCKS",
x = "Stimulus Block",
fill = "Age") +
theme_minimal()
As expected, the (smaller) sample of participants recruited from
Tumblr skewed toward younger ages than the broader Prolific sample. This
was expected based on the known demographic characteriztion of Tumblr
users. Most importantly, across both samples, the relative distribution
of age categories is roughly uniform and thus comparable.
Convenience sampling on both TUMBLR and PROLIFIC pools means we do not expect a uniform (i.e. equal) distribution of participants across gender categories. Based on known demographics of Tumblr we expect more of these users to identify as a category other than male or female. Due to random assignment to stimulus blocks, we expect that the distribution of participants in each gender category is uniform within the sample source and across stimulus blocks.
#SET DATAFRAME
df = df_data
# GENDER by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_gender, fill = Distribution)) +
geom_bar(position="stack") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant GENDER by SAMPLE",
caption = "",
y = "GENDER",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# GENDER by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_gender) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant GENDER by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "GENDER") +
theme_minimal()
# GENDER by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_gender) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( title = "Participant GENDER by SAMPLE and BLOCK",
caption = "As expected, similiar distributions within SAMPLE across BLOCKS",
x = "Stimulus Block", fill = "GENDER") +
theme_minimal()
As expected, the smaller Tumblr sample contained a greater
proportion of users identifiying with categories other than Male and
Female. However the relative distribution across gender categories
within each sample is roughly uniform and thus comparable.
df <- df_data
# GENDER (collapsed) by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_gender_collapsed, fill = Distribution)) +
geom_bar(position="stack") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant GENDER by SAMPLE",
caption = "",
y = "GENDER (collapsed)",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# GENDER (collapsed) by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_gender_collapsed) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant GENDER by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "GENDER (collapsed)") +
theme_minimal()
# GENDER (collapsed) by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_gender_collapsed) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( title = "GENDER (collapsed) by SAMPLE and BLOCK",
caption = "As expected, similiar distributions within SAMPLE across BLOCKS",
x = "Stimulus Block", fill = "GENDER (collapsed)") +
theme_minimal()
The relative distribution across gender categories (collapsed)
within each sample is roughly uniform and thus comparable.
Here we explore the free-text responses given for the survey question ‘Gender’ at the level ‘Prefer to Self Describe’.
df <- df_data
### DESCRIBE GENDER FREE REPONSE TEXT
#TABLE of Free Response Demographic Gender
title = "Participant Gender — Self Describe"
cols = c("Text","PROLIFIC","TUMBLR","TOTAL")
cont <- table(df$D_gender_4_TEXT, df$Distribution)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Text | PROLIFIC | TUMBLR | TOTAL |
|---|---|---|---|
| Agender | 1 | 0 | 1 |
| gender fluid | 0 | 1 | 1 |
| genderqueer | 0 | 2 | 2 |
| genderqueer (dislike term non-binary) | 0 | 1 | 1 |
| genderqueer trans man | 0 | 1 | 1 |
| Genderqueer wlw | 0 | 1 | 1 |
| genderqueer woman | 0 | 1 | 1 |
| My sex is female, but I don’t think of myself as any gender. I’m in a female body, but my being isn’t tied to this cultural construct. | 0 | 1 | 1 |
| queer | 0 | 1 | 1 |
| She/They | 0 | 1 | 1 |
| Trans male | 0 | 1 | 1 |
| Transgender male | 0 | 1 | 1 |
| transmasc | 0 | 1 | 1 |
| Sum | 1 | 13 | 14 |
## FOR DESCRIPTIVES PARAGRAPH
# #PROLIFIC
df.p <- df_data %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table()
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)
# #TUMBLR
df.t <- df_data %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table()
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)
240 individuals from Prolific participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).
Note that a higher proportion of participants recruited from Tumblr represent identities other than cis-gender Female and cis-gender Male. 78 individuals from Tumblr participated in Study 2, ( 36% Female, 5% Male, 40% Non-binary, 19% Other).
We have no particular expecations with respect to the distribution of education-levels across TUMBLR and PROLIFIC samples, but expect the distribution to be uniform across randomly-assigned stimulus blocks.
df <- df_data
# EDUCATION by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_education, fill = Distribution)) +
geom_bar(position="stack") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant EDUCATION by SAMPLE",
caption = "",
y = "EDUCATION",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# EDUCATION by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_education) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant EDUCATION by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "EDUCATION") +
theme_minimal()
# EDUCATION by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_education) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .)+
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( x = "Stimulus Block", fill = "EDUCATION",
title = "Participant EDUCATION by SAMPLE and BLOCK",
caption = "similiar distributions of education across blocks within samples") +
theme_minimal()
The relative distribution across education categories within each
sample is roughly uniform and thus comparable.
df <- df_data
# EDUCATION (collapsed) by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_education_collapsed, fill = Distribution)) +
geom_bar(position="stack") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant EDUCATION (collapsed) by SAMPLE",
caption = "",
y = "EDUCATION (collapsed)",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# EDUCATION (collapsed) by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_education_collapsed) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant EDUCATION (collapsed) by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "EDUCATION (collapsed)") +
theme_minimal()
# EDUCATION (collapsed) by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_education_collapsed) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .)+
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( x = "Stimulus Block", fill = "EDUCATION",
title = "Participant EDUCATION (collapsed) by SAMPLE and BLOCK",
caption = "similiar distributions of education across blocks within samples") +
theme_minimal()
The relative distribution across education categories (collapsed)
within each sample is roughly uniform and thus comparable.
Convenience sampling on both TUMBLR and PROLIFIC pools means we do not expect a uniform (i.e. equal) distribution of participants across political affiliations. Based on known demographics of TUMBLR and PROLIFIC we expect both samples to contain substantially more self-identifying DEMOCRATS than REPUBLICANS, with TUMBLR containing more OTHER and INDEPENDENT more of these users to identify as a category other than male or female. Due to random assignment to stimulus blocks, we expect that the distribution of participants in each political affiliation category is uniform within the sample source and across stimulus blocks.
df <- df_data %>% mutate(politicalParty = fct_rev(D_politicalParty))
# AFFILIATION by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = politicalParty, fill = Distribution)) +
geom_bar(position="stack") +
labs(
title = "Political AFFILIATION by SAMPLE",
caption = "",
y = "POLITICAL AFFILIATION",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# AFFILIATION by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(politicalParty) )) +
geom_bar(position = "stack") +
scale_fill_manual(values = my_palettes(name="politics", type = "discrete")) +
labs(
title = "Political AFFILIATION by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "AFFILIATION") +
theme_minimal()
# AFFILIATION by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df_data, aes( fill = D_politicalParty, x = Assigned.Block )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
scale_fill_manual(values = my_palettes(name="politics", type = "discrete")) +
labs( title = "Political AFFILIATION by SAMPLE and BLOCK",
caption = "Similar distribution across BLOCKS within SAMPLES", x = "Stimulus Block",
fill = "AFFILIATION") +
theme_minimal()
As expected, across both samples the participant pool skews toward
DEMOCRAT and INDEPENDENT affiliations with very few REPUBLICANS.
Importantly, the relative distribution of political affiliations within
a sample across stimulus blocks is roughly uniform, and thus
comparable.
In addition to political affiliation, participants were asked to rate their fiscal political values from left leaning to right leaning along a horizontal slider where left-leaning = 0 and right_leaning = 100.
df <- df_data
# FISCAL POLITICAL by SAMPLE
# VERTICAL SEMANTIC DIFFERENTIAL
leftside <- rep("left-leaning", nlevels(df$Distribution))
rightside <- rep("right-leaning", nlevels(df$Distribution))
g <- ggplot(df, aes(x=fct_rev(Distribution), y=D_politicsFiscal, color=Distribution)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
labs( title = "FISCAL VALUES by SAMPLE",
caption = "As expected, the smaller Tumblr sample is more left-leaning than the Prolific sample",
y = "Fiscal Politics", x = "", color = "Sample") +
theme_minimal() +
coord_flip()
g + guides(
y = guide_axis_manual(
breaks = c("PROLIFIC","TUMBLR"),
labels = leftside
),
y.sec = guide_axis_manual(
breaks = c("PROLIFIC","TUMBLR"),
labels = rightside
))
# FISCAL POLITICAL SAMPLE and BLOCK
# VERTICAL STACKED SEMANTIC DIFFERENTIAL
leftside <- rep("left-leaning", length(ref_blocks))
rightside <- rep("right-leaning", length(ref_blocks))
g <- ggplot(df, aes(x=fct_rev(Assigned.Block), y=D_politicsFiscal, color=Assigned.Block)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
facet_grid(Distribution ~ .)+
labs( title = "FISCAL VALUES by SAMPLE and Stimulus BLOCK",
caption = "similar distributions across BLOCKS within SAMPLE",
y = "Fiscal Politics ", x = "", color = "Block") +
theme_minimal() +
coord_flip()
g + guides(
y = guide_axis_manual(
breaks = ref_blocks,
labels = leftside
),
y.sec = guide_axis_manual(
breaks = ref_blocks,
labels = rightside
))
rm(g)
Across stimulus blocks within each sample, the distribution of fiscal political values is roughly uniform and thus comparable. It should be noted that both samples are largely left-leaning, consistent with the responses for Political Affiliation.
r_politics <- nrow(df_data %>% filter(D_politicsSocial >= 50)) / nrow(df_data)
l_politics <- nrow(df_data %>% filter(D_politicsSocial < 50)) / nrow(df_data)
r_fiscal <- nrow(df_data %>% filter(D_politicsFiscal >= 50)) / nrow(df_data)
l_fiscal <- nrow(df_data %>% filter(D_politicsFiscal < 50)) / nrow(df_data)
#PROLIFIC
df.p <- df_data %>% filter(Distribution == "PROLIFIC")
desc.fiscal.p <- psych::describe(df.p$D_politicsFiscal)
desc.social.p <- psych::describe(df.p$D_politicsSocial)
#TUMBLR
df.t <- df_data %>% filter(Distribution == "TUMBLR")
desc.fiscal.t <- psych::describe(df.t$D_politicsFiscal)
desc.social.t <- psych::describe(df.t$D_politicsSocial)
For the 240 participants recruited from Prolific, a spectrum of Social Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 100, with a mean value of 32.27, SD = 27.49. A spectrum of Fiscal Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 100, with a mean value of 39.4, SD = 29.06.
For the 78 participants recruited from Tumblr, a spectrum of Social Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 50, with a mean value of 9.09, SD = 11.06. A spectrum of Fiscal Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 91, with a mean value of 16.08, SD = 16.97.
Overall, 77.99% of respondents identify with left-leaning social values (vs) 22.01% identifying as right-leaning; while 66.35% of respondents reported left-leaning fiscal values (vs) 33.65% identify as right-leaning.
We have no expectation for distribution across income categories, but expect that the relative distribution across categories is uniform within each sample.
df <- df_data
# INCOME (collapsed) by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_income, fill = Distribution)) +
geom_bar(position="stack") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant INCOME by SAMPLE",
caption = "",
y = "INCOME",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# INCOME by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_income) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant INCOME by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "INCOME") +
theme_minimal()
# INCOME by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_income) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .)+
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( x = "Stimulus Block", fill = "INCOME",
title = "Participant INCOME by SAMPLE and BLOCK",
caption = "similiar distributions of across blocks within samples") +
theme_minimal()
Across stimulus blocks within each sample, the distribution of
income categories is roughly uniform and thus comparable.
We have no expectation for distribution across employment categories, but expect that the relative distribution across categories is uniform within each sample.
df <- df_data
# EMPLOYMENT STATUS by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = D_employmentStatus, fill = Distribution)) +
geom_bar(position="stack") +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant EMPLOYMENT STATUS by SAMPLE",
caption = "",
y = "EMPLOYMENT STATUS",
x = "(count)",
fill = "SAMPLE") +
theme_minimal()
# EMPLOYMENT by SAMPLE
# HORIZONTAL STACKED BAR
ggplot(data = df, aes( y = Distribution, fill = fct_rev(D_employmentStatus) )) +
geom_bar(position = "stack") +
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs(
title = "Participant EMPLOYMENT STATUS by SAMPLE",
caption = "",
x = "(count)",
y = "SAMPLE",
fill = "EMPLOYMENT") +
theme_minimal()
# EMPLOYMENT by SAMPLE and BLOCK
# VERTICAL FACET BAR CHART
ggplot(data = df, aes( x = Assigned.Block, fill = fct_rev(D_employmentStatus) )) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .)+
scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
labs( x = "Stimulus Block", fill = "EMPLOYMENT",
title = "Participant EMPLOYMENT by SAMPLE and BLOCK",
caption = "similiar distributions of across blocks within samples") +
theme_minimal()
Here we explore the free-text responses given for the survey question ‘Work Detail’.
df <- df_data
#TABLE of Free Response Work Detail
title = "Participant WORK DETAIL — Self Describe"
cols = c("Text","PROLIFIC","TUMBLR","TOTAL")
cont <- table(df$D_work_detail, df$Distribution)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>% kable_classic()
| Text | PROLIFIC | TUMBLR | TOTAL |
|---|---|---|---|
| “Productivity Analyst” (its systems engineering) | 0 | 1 | 1 |
| academic administration | 0 | 1 | 1 |
| accountant | 1 | 0 | 1 |
| Accountant | 1 | 0 | 1 |
| Accounting Student persuing my bachelor’s degree and working part-time under a CPA | 0 | 1 | 1 |
| Admin | 1 | 0 | 1 |
| Admin Asst-finance | 1 | 0 | 1 |
| Administration | 1 | 0 | 1 |
| Administrative assistant | 1 | 0 | 1 |
| Administrative Manger | 1 | 0 | 1 |
| Administrative staff | 1 | 0 | 1 |
| Advisor | 1 | 0 | 1 |
| Although I’m currently looking for a new job, I used to do mostly office related jobs, mostly on the computer. | 1 | 0 | 1 |
| Analyst management | 1 | 0 | 1 |
| Arborist, I do tree work | 1 | 0 | 1 |
| Art Teacher | 1 | 0 | 1 |
| Artist | 1 | 1 | 2 |
| Assembler I tried my best on this but i got the worst call im sorry | 1 | 0 | 1 |
| Assistant Manager | 1 | 0 | 1 |
| Attraction operator at an amusement park. | 0 | 1 | 1 |
| Audit Manager | 1 | 0 | 1 |
| Automotive technician | 0 | 1 | 1 |
| Behavior Technician | 0 | 1 | 1 |
| Beta manager | 1 | 0 | 1 |
| biomedical research | 1 | 0 | 1 |
| Biomedical Research | 1 | 0 | 1 |
| Building Manager | 1 | 0 | 1 |
| Business Developer | 1 | 0 | 1 |
| Case manager | 1 | 0 | 1 |
| Cashier | 1 | 1 | 2 |
| Claims specialist | 1 | 0 | 1 |
| Clinical dietitian | 1 | 0 | 1 |
| Clinical mental health counselor | 0 | 1 | 1 |
| College student | 1 | 0 | 1 |
| Computer Science | 1 | 0 | 1 |
| Consultant | 1 | 1 | 2 |
| consultation agent | 1 | 0 | 1 |
| content creator | 1 | 0 | 1 |
| content moderator | 1 | 0 | 1 |
| Cook | 1 | 0 | 1 |
| Court revenue assistant | 1 | 0 | 1 |
| Crew member in fast food or student | 0 | 1 | 1 |
| Crowdsourcing housekeeping and companionship | 1 | 0 | 1 |
| CSR/moderator/psychologist industrial organizational | 1 | 0 | 1 |
| currently unemployed | 1 | 0 | 1 |
| Custom Glass Installation | 1 | 0 | 1 |
| Customer care | 1 | 0 | 1 |
| customer service | 1 | 0 | 1 |
| data analysis | 1 | 0 | 1 |
| Data analyst. Move data around, pull data, analyze, create visualizations, etc | 1 | 0 | 1 |
| Data consultant | 1 | 0 | 1 |
| data entry | 1 | 0 | 1 |
| Data entry operator | 0 | 1 | 1 |
| Deli Production in a Grocery Store | 0 | 1 | 1 |
| Dental assistant | 1 | 0 | 1 |
| Design Director | 1 | 0 | 1 |
| director of finance | 0 | 1 | 1 |
| Disabled | 1 | 1 | 2 |
| Dog trainer | 1 | 0 | 1 |
| dog walker | 1 | 0 | 1 |
| donor research and data visualization for higher education fundraising | 0 | 1 | 1 |
| Ecologist | 0 | 1 | 1 |
| Educational Services | 1 | 0 | 1 |
| educator | 1 | 0 | 1 |
| Educator | 1 | 0 | 1 |
| engineer | 2 | 0 | 2 |
| Engineer | 1 | 0 | 1 |
| entry level manager | 1 | 0 | 1 |
| EO Analyst | 0 | 1 | 1 |
| Escape room game master | 0 | 1 | 1 |
| Events supervisor who manages the culinary team at catered events while also doing prep work when not catering | 1 | 0 | 1 |
| Executive Administrative Assistant | 1 | 0 | 1 |
| Executive Assistant | 1 | 0 | 1 |
| Face painter and customer service representative | 0 | 1 | 1 |
| Family service counselor | 1 | 0 | 1 |
| Fashion retail manager | 1 | 0 | 1 |
| Fast-food worker | 0 | 1 | 1 |
| finance | 1 | 0 | 1 |
| Financial broker | 1 | 0 | 1 |
| Financial counselor | 0 | 1 | 1 |
| Financial Manager | 1 | 0 | 1 |
| Fisheries management and research | 1 | 0 | 1 |
| Food Service Assistants | 1 | 0 | 1 |
| Fraud analyst | 1 | 0 | 1 |
| freelance ai data trainer | 0 | 1 | 1 |
| Freelance artist | 0 | 1 | 1 |
| Freelance photography | 1 | 0 | 1 |
| freelance writing | 1 | 0 | 1 |
| Freelancer (prefer not to elaborate) | 1 | 0 | 1 |
| Full time student at the moment but I used to work in public service as a bartender | 0 | 1 | 1 |
| Genetic Counselor | 1 | 0 | 1 |
| gig work | 1 | 0 | 1 |
| Grad student (MSW), part time Behavior Technician in Applied Behavior Analysis working with kids with autism | 0 | 1 | 1 |
| Grants Manager | 0 | 1 | 1 |
| Graphic Design | 1 | 0 | 1 |
| graphic designer | 2 | 0 | 2 |
| Health Care System Patient Access Manager | 1 | 0 | 1 |
| Health science research | 0 | 1 | 1 |
| High school teacher | 0 | 1 | 1 |
| homecare | 1 | 0 | 1 |
| homemaker | 3 | 1 | 4 |
| Homemaker | 1 | 0 | 1 |
| Homemaker past educator | 1 | 0 | 1 |
| Housekeeper | 1 | 0 | 1 |
| HR Specialist | 1 | 0 | 1 |
| Human Resources Specialist (Classification) | 0 | 1 | 1 |
| I am a digital analyst at a major retailer | 1 | 0 | 1 |
| I am a manager In my organization. I lead 5 people in my team. | 1 | 0 | 1 |
| I am a product owner for a master data management tool. | 1 | 0 | 1 |
| I am a retired nurse and Administrative Assistant. | 1 | 0 | 1 |
| I am a student and do freelance work mainly related to writing and research. | 1 | 0 | 1 |
| I am an assistant principal and a teacher | 1 | 0 | 1 |
| I am unemployed but looking for work. I have a data/regulatory compliance occupational background. | 1 | 0 | 1 |
| I am unemployed<33 | 0 | 1 | 1 |
| I design and implement the financial part of the ERP system. I ensure the overall architecture of the system. | 1 | 0 | 1 |
| I do personal color analysis for people and I also volunteer and take free specialized language courses (Advanced Judeo-Persian through Oxford, Classical Persian reading with a prof friend of mine) | 0 | 1 | 1 |
| I do small jobs on Prolific | 1 | 0 | 1 |
| I do surveys right now. I was in a car accident recently. I had been driving for Doordash | 1 | 0 | 1 |
| I have recently gotten a new job, so I have done mail processing work as well as bakery production while also freelance editing | 0 | 1 | 1 |
| I manage a department that designs training modules for the life sciences | 0 | 1 | 1 |
| I publish and manage product listings on internet platforms. | 1 | 0 | 1 |
| i receive disability income and do part time community outreach work | 0 | 1 | 1 |
| I supervise a 7 member team in the systems support division of my organization. We handle problem tickets and document the knowledge gained from identifying solutions. | 1 | 0 | 1 |
| I talk on the phone | 0 | 1 | 1 |
| I tell people that they did their taxes wrong and need to send more money, mostly. | 1 | 0 | 1 |
| I work as a scientist | 1 | 0 | 1 |
| I work in IT and data analysis. | 1 | 0 | 1 |
| I’m a cashier/front desk associate | 0 | 1 | 1 |
| I’m a graphic designer looking for work | 1 | 0 | 1 |
| I’m an ad rater for a search engine | 1 | 0 | 1 |
| I’m disabled | 0 | 1 | 1 |
| I’m self employed mostly doing transcription work. | 1 | 0 | 1 |
| Import Supervisor | 1 | 0 | 1 |
| Independent Contractor | 2 | 0 | 2 |
| Independent Contractor for IT | 1 | 0 | 1 |
| Information Technology | 1 | 0 | 1 |
| Intern/assistant | 1 | 0 | 1 |
| IT | 2 | 1 | 3 |
| IT Project Manager | 1 | 0 | 1 |
| IT Support | 1 | 0 | 1 |
| IT support and research | 1 | 0 | 1 |
| IT Support Technician | 1 | 0 | 1 |
| IT Technician | 1 | 0 | 1 |
| Kitchen Manager | 1 | 0 | 1 |
| Lab Technician | 1 | 0 | 1 |
| Law Associate | 1 | 0 | 1 |
| lead receptionist | 1 | 0 | 1 |
| legal assistant | 1 | 0 | 1 |
| Loan Specialist | 1 | 0 | 1 |
| looking for a position in biomedical engineering or chemistry | 0 | 1 | 1 |
| Manage family IT business | 1 | 0 | 1 |
| Management consultant | 1 | 0 | 1 |
| manager | 1 | 0 | 1 |
| Manager | 3 | 0 | 3 |
| Manager of Application Support | 1 | 0 | 1 |
| manager video editor | 1 | 0 | 1 |
| Marketing | 1 | 0 | 1 |
| Marketing and sales | 1 | 0 | 1 |
| master data specialist | 1 | 0 | 1 |
| medical assistant | 1 | 0 | 1 |
| medical laboratory scientist | 1 | 0 | 1 |
| military | 1 | 0 | 1 |
| n/a | 1 | 1 | 2 |
| N/A | 5 | 0 | 5 |
| N/A Matriculating to a medical school, beginning in August | 1 | 0 | 1 |
| Nanny | 2 | 0 | 2 |
| network engineer | 1 | 0 | 1 |
| Network technician | 1 | 0 | 1 |
| Nonprofit Outreach | 0 | 1 | 1 |
| Not applicable | 1 | 0 | 1 |
| not working at the moment but working toward archival work | 0 | 1 | 1 |
| nurse | 1 | 0 | 1 |
| Nurse | 2 | 0 | 2 |
| Office Manager | 1 | 0 | 1 |
| Operations (for a tech company) | 1 | 0 | 1 |
| Operations Lead | 1 | 0 | 1 |
| Operations Manager, take care of day-to-day life | 1 | 0 | 1 |
| Outdoor educator | 0 | 1 | 1 |
| Package Handler | 1 | 0 | 1 |
| Package-Handler/General Laborer | 1 | 0 | 1 |
| Paralegal | 1 | 0 | 1 |
| Parent | 0 | 1 | 1 |
| part-time factory worker, production | 1 | 0 | 1 |
| Payroll Manager | 1 | 0 | 1 |
| pet care specialist | 1 | 0 | 1 |
| PhD student | 0 | 1 | 1 |
| physical therapist | 1 | 0 | 1 |
| Post-baccalaureate research assistant | 0 | 1 | 1 |
| preschool teacher | 0 | 1 | 1 |
| Preschool teacher. | 0 | 1 | 1 |
| professional aide | 0 | 1 | 1 |
| Professional writer. Technical documentation + grant writing. | 0 | 1 | 1 |
| Professor | 2 | 0 | 2 |
| Program manager at a non profit. | 1 | 0 | 1 |
| project coordinator | 0 | 1 | 1 |
| Project Manager | 1 | 0 | 1 |
| Prolific | 1 | 0 | 1 |
| Property Manager | 1 | 0 | 1 |
| Property Preservation | 1 | 0 | 1 |
| Public safety | 1 | 0 | 1 |
| Quality Control Supervisor | 1 | 0 | 1 |
| r and d | 0 | 1 | 1 |
| R&D scientist | 1 | 0 | 1 |
| Reception | 1 | 0 | 1 |
| Regional Manager | 1 | 0 | 1 |
| Registered Behavior Technician | 1 | 0 | 1 |
| Repair Artist | 1 | 0 | 1 |
| represent buyers and sellers of real estate | 1 | 0 | 1 |
| Research and technical service | 1 | 0 | 1 |
| Research assistant | 0 | 1 | 1 |
| research associate | 1 | 0 | 1 |
| Research Associate 2 | 1 | 0 | 1 |
| Research compensation options | 1 | 0 | 1 |
| Research coordinator | 1 | 0 | 1 |
| Restaurant | 1 | 0 | 1 |
| retail sales | 1 | 0 | 1 |
| Retail worker, low level | 1 | 0 | 1 |
| retired | 1 | 0 | 1 |
| Retired | 0 | 1 | 1 |
| retired but have on-line hobby business | 1 | 0 | 1 |
| sales | 1 | 0 | 1 |
| Sales | 1 | 0 | 1 |
| sales and business | 1 | 0 | 1 |
| sales associate | 1 | 0 | 1 |
| Search analyst | 1 | 0 | 1 |
| Self employed - owner | 1 | 0 | 1 |
| self employed wellness business | 1 | 0 | 1 |
| self-employed | 1 | 0 | 1 |
| Self-employed | 1 | 0 | 1 |
| Self-Employed | 1 | 0 | 1 |
| Senior Internet Analyst | 1 | 0 | 1 |
| Shift Supervisor | 0 | 1 | 1 |
| simulations engineer for power plants | 0 | 1 | 1 |
| Social media moderation | 1 | 0 | 1 |
| Social worker | 0 | 1 | 1 |
| software dev | 1 | 0 | 1 |
| Software Engineering | 0 | 1 | 1 |
| Solutions Engineer | 1 | 0 | 1 |
| Staff Accountant | 1 | 0 | 1 |
| Stakeholder engagement | 1 | 0 | 1 |
| Statistician | 1 | 0 | 1 |
| Stay at home parent | 1 | 0 | 1 |
| Store manager | 1 | 0 | 1 |
| student | 0 | 2 | 2 |
| Student | 3 | 2 | 5 |
| Student (majoring in Information Technology) | 1 | 0 | 1 |
| student doctor | 1 | 0 | 1 |
| Student, supported by parents :P | 0 | 1 | 1 |
| Sub Teacher | 1 | 0 | 1 |
| survey | 1 | 0 | 1 |
| Teacher | 0 | 1 | 1 |
| teacher assistant | 1 | 0 | 1 |
| Teacher’s assistant | 0 | 1 | 1 |
| teaching faculty, large lecture of introductory programming | 0 | 1 | 1 |
| Tech support | 1 | 0 | 1 |
| technology engineer | 1 | 0 | 1 |
| Technology, manager | 1 | 0 | 1 |
| therapist | 1 | 0 | 1 |
| Transcriptionist | 1 | 0 | 1 |
| Transportation manager | 1 | 0 | 1 |
| Tutor | 1 | 0 | 1 |
| Unemployed | 7 | 2 | 9 |
| Unemployed - Accountant | 1 | 0 | 1 |
| Unemployed - Previously cashier | 0 | 1 | 1 |
| Unemployed (Previously: Ad Rater) | 1 | 0 | 1 |
| Unemployed (student) | 1 | 0 | 1 |
| Unemployed (Student) | 0 | 1 | 1 |
| Unemployed and seeking work | 1 | 0 | 1 |
| unemployed at the moment because I’m working on graduate school applications | 0 | 1 | 1 |
| Unemployed-Laborer | 1 | 0 | 1 |
| Unemployed/NA | 1 | 0 | 1 |
| Unpaid household labor | 1 | 0 | 1 |
| User Research | 0 | 1 | 1 |
| ux designer | 1 | 0 | 1 |
| Visual designer, clerical work, web design | 0 | 1 | 1 |
| VP of Non profit (unpaid) | 1 | 0 | 1 |
| Warehouse manager | 1 | 0 | 1 |
| Well i run a tumblr account and i run commissions! So uh, all of them? | 0 | 1 | 1 |
| Wildlife Tech | 1 | 0 | 1 |
| woodworker | 1 | 0 | 1 |
| Writer, Tutor | 0 | 1 | 1 |
| Sum | 239 | 77 | 316 |
df <- df_graphs %>% select(PID, Assigned.Block, Distribution, D_politicsSocial, D_politicsFiscal, D_politicalParty) %>%
mutate(
d_social = D_politicsSocial,
d_fiscal = D_politicsFiscal
)
ggplot(df, aes(x = d_social, y = d_fiscal, color = D_politicalParty)) +
geom_point() +
geom_hline(yintercept = 50) +
geom_vline(xintercept = 50) +
facet_grid(Distribution~D_politicalParty)+
labs(
title = "Social and Fiscal Political Values by Political Party Affiliation",
x = "Social Values", y = "Fiscal Values"
)+
theme_minimal() +
easy_remove_legend()
df <- df_data
#PLATFORM CHOICE by Age
## FACETED STACKED BAR CHART
ggplot( df, aes( x = D_age, fill = PLATFORM)) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
labs( title = "PLATFORM CHOICE by SAMPLE and AGE",
caption = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
easy_add_legend_title("") +
theme_minimal()
# qacBase::crosstab(df, D_age,PLATFORM, plot=TRUE)
df <- df_data
#Education by Income
## FACETED STACKED BAR CHART
ggplot( df, aes( x = D_education_collapsed, fill = D_income)) +
geom_bar(position = "stack") +
facet_grid(Distribution ~ .) +
labs( title = "Education (collapsed) by SAMPLE and INCOME",
caption = "", x = "") +
scale_fill_viridis(discrete=TRUE, option="viridis") +
easy_add_legend_title("") +
theme_minimal()
wip code stash
#
# #GET JUST TUMBLRS
# df <- df_data %>%
# filter(Distribution == "PROLIFIC") %>%
# select(D_gender_collapsed, D_education_collapsed, D_income, D_age, D_politicalParty) %>%
# dplyr::rename(
# GENDER = D_gender_collapsed,
# EDUCATION = D_education_collapsed,
# INCOME = D_income,
# AGE = D_age,
# POLITICS = D_politicalParty
# )
#
#
# #UPPER IS BAR PLOT, LOWER IS DENSITY AUTOPOINT
# ggpairs(data = df, columns = 1:ncol(df),
# showStrips = TRUE,
# upper = list(discrete ="facetbar"),
# lower = list(discrete = "autopoint")) + theme_minimal() +
# labs(
# title = "Demographic Summary for TUMBLR"
# )
#
# #DEFAULT PAIRS
# ggpairs(data = df, columns = 1:ncol(df)) + theme_minimal() + labs(
# title = "Demographic Summary for TUMBLR"
# )